import requests
from bs4 import BeautifulSoup
import re
import xlwt

#需求：图书的名字、封面图片、价格、评分、库存、产品介绍、所属分类。
wb = xlwt.Workbook()#新建文档用语存储
sh = wb.add_sheet('信息',cell_overwrite_ok=True)
start_url= 'http://books.toscrape.com/'
next_page_url = start_url
r = requests.get(next_page_url)
r.raise_for_status()
bs =BeautifulSoup(r.text,'html.parser')
ul_list = bs.find('ul',class_='nav nav-list')
sort_url = ul_list.find_all('a')

#建立文档表头
heads = ['图书名称','封面图片','价格','评分','库存','产品介绍','所属分类']
for i in range(len(heads)):
    sh.write(0,i,heads[i])

#内容
j=0
for url in sort_url[1:]:
    #获取不同分类的链接
    next_page_url = url['href']
    next_page_url = start_url + next_page_url
    #print(next_page_url)
    r1 = requests.get(next_page_url)
    r1.raise_for_status()
    bs1 = BeautifulSoup(r1.text, 'html.parser')
    content = bs1.find('div',class_='col-sm-8 col-md-9')
    label = content.h1.text
    print("分类:",label)#所属分类
    books_content = content.find_all('article')#每本书信息
    for book1 in books_content:
        #进入书本详情页面
        url1 = book1.h3.a['href']
        book1_url = url1.replace('../../../','http://books.toscrape.com/catalogue/')
        r2 = requests.get(book1_url)
        bs2 = BeautifulSoup(r2.text, 'html.parser')
        books_content1 = bs2.find()
        # 图书名字
        name = books_content1.h1.text
        #print("图书名字:",name)
        # 封面图片
        image = books_content1.img['src']
        image = image.replace('../../','http://books.toscrape.com/')
        #print("封面图片:",image)
        #价格
        price = books_content1.p.text
        #print("价格:",price)
        # 评分
        star = book1.p
        star = star['class'][1]
        #print("评分:",star)
        #库存
        in_stork = books_content1.find('p',class_="instock availability")
        in_stork = re.sub("\D","",in_stork.text)
        #print("库存:",in_stork)
        #产品介绍
        content_p = books_content1.find('div',id="content_inner")
        t = content_p.select('p')[3].text
        #print("产品介绍:",t)
        item = [name,image,price,star,in_stork,t,label]
        print(item)
        j += 1
        for i in range(len(item)):
            sh.write(j, i, item[i])

wb.save('./books.xls')






